rm(list = ls())
require(stats4)
require(nlWaldTest)
require("multiwayvcov")
require(msm)
require(stargazer)
require(mfx)
require(restriktor)

data21=read.csv("C:/Users/naten/OneDrive - University of Tennessee/Dean Stuff/data/columbia_experiment_december6noon_type7clean.csv", header=TRUE)
data22=read.csv("C:/Users/naten/OneDrive - University of Tennessee/Dean Stuff/data/columbia_experiment_december15_type7clean.csv", header=TRUE)
data23=read.csv("C:/Users/naten/OneDrive - University of Tennessee/Dean Stuff/data/columbia_experiment_january26_type7(clean).csv", header=TRUE)

dataframe21=data.frame(data21)
dataframe22=data.frame(data22)
dataframe23=data.frame(data23)

#some definitions
expframe=rbind(dataframe21,dataframe22,dataframe23)
expframe=subset(expframe,uid>800)
expframe$correct <-(expframe$state==4&(expframe$response==2|expframe$response==7|expframe$response==8|expframe$response==9))|(expframe$state==3&(expframe$response==1|expframe$response==6|expframe$response==3|expframe$response==5))
expframe$incentive <- (expframe$qid==5)*5+(expframe$qid==1)*40+(expframe$qid==6)*70+(expframe$qid==7)*95
accuracy=c(sum((expframe$incentive==5)*expframe$correct)/sum(expframe$incentive==5),sum((expframe$incentive==40)*expframe$correct)/sum(expframe$incentive==40),sum((expframe$incentive==70)*expframe$correct)/sum(expframe$incentive==70),sum((expframe$incentive==95)*expframe$correct)/sum(expframe$incentive==95))
incentivec=c(5,40,70,95)
uidlist=unique(expframe$uid)

#check N
#**
length(uidlist)
#**



#for ease of factor use
expframe$incentive5=(expframe$incentive==5)
expframe$incentive40=(expframe$incentive==40)
expframe$incentive70=(expframe$incentive==70)
expframe$incentive95=(expframe$incentive==95)

#More definitions
expframe$Bresponse <-(expframe$response==2|expframe$response==7|expframe$response==8|expframe$response==9)
expframe$Bstate <- expframe$state==4
expframe$Aresponse <-(expframe$response==1|expframe$response==6|expframe$response==3|expframe$response==5)
expframe$Astate <- expframe$state==3



#Check NIAS and NIAC on the individual level
#NIAS


NIASAframe=data.frame(uid=vector(),A_state_avg_effect=vector(),Pvalue=vector(),incentive=vector())
for (i in 1:length(uidlist)){
for(j in 1:4){
addframe=0
playframe=subset(expframe,uid==uidlist[i] & incentive==incentivec[j])
if(sum(playframe$Aresponse)==0|sum(playframe$Bresponse)==0){
addframe=data.frame(uid=uidlist[i],A_state_coefficient=0,Pvalue=1,incentive=incentivec[j])
NIASAframe=rbind(NIASAframe,addframe)
}else{
model=logitmfx(Aresponse~Astate,data=playframe,robust=TRUE) 
addframe=data.frame(uid=uidlist[i],A_state_coefficient=model[[1]][1],Pvalue=model[[1]][4],incentive=incentivec[j])
NIASAframe=rbind(NIASAframe,addframe)
}
}
}

#Check NIAC 

restrictmat=matrix(c(0,1,0,0, 0,-1,1,0, 0,0,-1,1),nrow=3,ncol=4, byrow=TRUE)

NIACindiframe=data.frame(uid=vector('numeric'),fstat=vector('numeric'),pvalue=vector('numeric'))
for(k in 1:length(uidlist)){
playframe=subset(expframe,uid==uidlist[k])
if(uidlist[k]!=2305){
playmodel=glm(correct~incentive40+incentive70+incentive95,data=playframe,family=binomial())
playtest=conTest(playmodel,constraints=restrictmat,type="B",rhs=c(0,0,0))
addframe=data.frame(uid=uidlist[k],fstat=playtest$Ts,pvalue=playtest$pvalue)
}
#for subject 2305 who we check separately
if(uidlist[k]==2305){
addframe=data.frame(uid=uidlist[k],fstat=0.01,pvalue=0.99)
}
NIACindiframe=rbind(NIACindiframe,addframe)
}
sum(NIACindiframe$pvalue<0.05)/length(NIACindiframe$pvalue)

#NIAS and NIAC together
comboframe=data.frame(uid=vector('numeric'),NIASviol=vector('numeric'),NIACviol=vector('numeric'))
for(i in 1:length(uidlist)){
NIACplayframe=subset(NIACindiframe,uid==uidlist[i])
NIASplayframe=subset( NIASAframe,uid==uidlist[i])
addframe=data.frame(uid=uidlist[i],NIASviol=(sum((NIASplayframe$A_state_coefficient<0)*(NIASplayframe$Pvalue<0.05))>=1),NIACviol=(NIACplayframe$pvalue<0.05))
comboframe=rbind(comboframe,addframe)
}

comboframeclean=comboframe
NIASonly=100*sum((comboframeclean$NIASviol==1)*(comboframeclean$NIACviol==FALSE))/length(uidlist)
NIAConly=100*sum((comboframeclean$NIASviol==0)*(comboframeclean$NIACviol==TRUE))/length(uidlist)
both=100*sum((comboframeclean$NIASviol==1)*(comboframeclean$NIACviol==TRUE))/length(uidlist)
neither=100*sum((comboframeclean$NIASviol==0)*(comboframeclean$NIACviol==FALSE))/length(uidlist)
violatereport=data.frame(violate=c("NIAS only","NIAC only","Both","Neither"),percent=c(NIASonly,NIAConly,both,neither))


goodlist=comboframe$uid*(1-comboframe$NIASviol)*(1-comboframe$NIACviol)

expframe=subset(expframe,uid %in% goodlist)

#Base Regression
expframe$incentivedec=expframe$incentive/100
modelNIAS=lm(Aresponse~factor(Astate+incentivedec)+0,data=expframe)

#Adjust COV
vcovmodNIAS=vcovHC(modelNIAS, type="HC3")
vcovmodNIAScluster=cluster.vcov(modelNIAS, expframe$uid)


#Prob correct vs incentive
Textstr=vector("character")
#Aggregate test
Textstr[1]="((b[6]+(1-b[2]))-(b[5]+(1-b[1])))"
waldNIAC1=nlWaldtest(modelNIAS,texts=Textstr,Vcov=vcovmodNIAScluster)
waldNIAC1

Textstr[1]="((b[7]+(1-b[3]))-(b[5]+(1-b[1])))"
waldNIAC2=nlWaldtest(modelNIAS,texts=Textstr,Vcov=vcovmodNIAScluster)
waldNIAC2

#*incentive comparison*******************************************************************
Textstr[1]="((b[8]+(1-b[4]))-(b[5]+(1-b[1])))"
waldNIAC3=nlWaldtest(modelNIAS,texts=Textstr,Vcov=vcovmodNIAScluster)
waldNIAC3
#*************************************************************************************

Textstr[1]="((b[7]+(1-b[3]))-(b[6]+(1-b[2])))"
waldNIAC4=nlWaldtest(modelNIAS,texts=Textstr,Vcov=vcovmodNIAScluster)
waldNIAC4

Textstr[1]="((b[8]+(1-b[4]))-(b[6]+(1-b[2])))"
waldNIAC5=nlWaldtest(modelNIAS,texts=Textstr,Vcov=vcovmodNIAScluster)
waldNIAC5

Textstr[1]="((b[8]+(1-b[4]))-(b[7]+(1-b[3])))"
waldNIAC6=nlWaldtest(modelNIAS,texts=Textstr,Vcov=vcovmodNIAScluster)
waldNIAC6


#data cleaning
chooseB=vector('numeric')
for(i in 1:length(uidlist)){
playframe=subset(expframe,uid==uidlist[i])
chooseB[i]=(sum(playframe$Bresponse)>5)
}

zeroedlist=chooseB*uidlist
cleanlist=zeroedlist[zeroedlist>0]

incentive=c(5,40,70,95)

uidlist=unique(expframe$uid)

Predicted=vector('numeric')
lambda=vector('numeric')
lowactual=vector('numeric')
Actual=vector('numeric')
for(i in 1:length(uidlist)){
indiframelow=subset(expframe,uid==uidlist[i]&incentive==40)
lowactual[i]=sum(indiframelow$correct)/length(indiframelow$correct)
lambda[i]=min(1000,40/(log(lowactual[i]/(1-lowactual[i]))))
Predicted[i]=exp(70/lambda[i])/(1+exp(70/lambda[i]))
if(lowactual[i]==1){
Predicted[i]=1
}
indiframehigh=subset(expframe,uid==uidlist[i]&incentive==70)
Actual[i]=sum(indiframehigh$correct)/length(indiframehigh$correct)
if(Predicted[i]<0.5){
Predicted[i]=NA
Actual[i]=NA
}
}

Predicted=na.omit(Predicted)
Actual=na.omit(Actual)
#Figure A2.2 *********************************************
plot(Predicted,Actual,lwd=2, font.lab=2, font.axis=1.2,xlim=c(0.4,1), ylim=c(0.4,1))
box(lwd=1, col="grey")
grid(nx = 0.1, ny =NULL , col = "gray", lty=1)
lines(c(0,1),c(0,1), lwd=2)
#*********************************************




sigdiffframe=data.frame(uid=vector('numeric'),lowinc=vector('numeric'),highinc=vector('numeric'),coef=vector('numeric'),pval=vector('numeric'))
for(i in 1:length(uidlist)){
for(j in 1:3){
for(k in (j+1):4){
playframe=subset(expframe,uid==uidlist[i]&(incentive==incentivec[j]|incentive==incentivec[k]))

playframe$highinc=(playframe$incentive==incentivec[k])
model=glm(correct~incentive+highinc+0,data=playframe,family=binomial(link='logit'))
#Note that coef is reversed due to a quirk of model definitions
addframe=data.frame(uid=uidlist[i],lowinc=incentivec[j],highinc=incentivec[k],coef=summary(model)$coefficients[2,1],pval=summary(model)$coefficients[2,4])

if(sum(playframe$correct*(1-playframe$highinc))/sum(1-playframe$highinc)>0.5){
sigdiffframe=rbind(sigdiffframe,addframe)
}
}
}
}

smalluidlist=unique(sigdiffframe$uid)
fastslowframe=data.frame(uid=vector('numeric'),tooslow=vector('numeric'),toofast=vector('numeric'))
for(i in 1:length(smalluidlist)){
playframe=subset(sigdiffframe,uid==smalluidlist[i])
slow=(sum((playframe$pval<0.05)*(playframe$coef>0))>=1)
fast=(sum((playframe$pval<0.05)*(playframe$coef<0))>=1)
addframe=data.frame(uid=uidlist[i],tooslow=slow,toofast=fast)
fastslowframe=rbind(fastslowframe,addframe)
}

#comparisons
#total
length(sigdiffframe$uid)
#total too fast
sum((sigdiffframe$pval<0.05)*(sigdiffframe$coef<0))
#total too slow
sum((sigdiffframe$pval<0.05)*(sigdiffframe$coef>0))

#individuals
length(smalluidlist)
#only too fast
sum(fastslowframe$toofast*(1-fastslowframe$tooslow))
#only too slow
sum(fastslowframe$tooslow*(1-fastslowframe$toofast))
#both
sum(fastslowframe$tooslow*fastslowframe$toofast)
#neither
sum((1-fastslowframe$tooslow)*(1-fastslowframe$toofast))





